// Copyright 2024 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

// Program hpack_gen generates a C++ file to help parse and emit HPACK.
package main

import (
	"fmt"
)

func main() {
	var statsBefore, statsAfter Stats

	buildTree()
	validate(&rootNode)
	computeStats(&statsBefore, &rootNode)

	optimize(&rootNode)
	validate(&rootNode)
	computeStats(&statsAfter, &rootNode)

	assignTableIndices(&rootNode, 0)

	fmt.Println("// Copyright 2024 The Pigweed Authors")
	fmt.Println("//")
	fmt.Println("// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not")
	fmt.Println("// use this file except in compliance with the License. You may obtain a copy of")
	fmt.Println("// the License at")
	fmt.Println("//")
	fmt.Println("//     https://www.apache.org/licenses/LICENSE-2.0")
	fmt.Println("//")
	fmt.Println("// Unless required by applicable law or agreed to in writing, software")
	fmt.Println("// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT")
	fmt.Println("// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the")
	fmt.Println("// License for the specific language governing permissions and limitations under")
	fmt.Println("// the License.")
	fmt.Println("//")
	fmt.Println("// AUTOGENERATED FILE, DO NOT EDIT")
	fmt.Println("//")
	fmt.Println("// To regenerate, run `go run gen/hpack_gen.go > hpack.autogen.inc`.")
	fmt.Println("// This file should be included in exactly one *.cc file.")
	fmt.Println("//")
	fmt.Println()
	fmt.Println("// clang-format off")
	printDecoderTable(&statsAfter)
	fmt.Println()
	fmt.Printf("// Decoder table stats:\n")
	fmt.Printf("//   before optimization = %+v\n", statsBefore)
	fmt.Printf("//   after  optimization = %+v\n", statsAfter)
	fmt.Println()
	printStaticResponses()
}

type NodeType int

const (
	BranchNode NodeType = iota
	OutputNode
	UnprintableNode
	EOSNode
)

type Node struct {
	t          NodeType
	output     int // if t == OutputNode, byte to output
	tableIndex int // if t == BranchNode, index in the decoder table
	child      [2]*Node
}

var rootNode = Node{t: BranchNode}

type Stats struct {
	numBranchNodes      int
	numOutputNodes      int
	numUnprintableNodes int
	numInvalidNodes     int
}

func buildTree() {
	for out, code := range huffmanTable {
		updateTree(out, code)
	}
}

func updateTree(out int, code string) {
	if len(code) == 0 {
		panic(fmt.Sprintf("code for byte %v is empty", out))
	}

	curr := &rootNode

	for i := 0; i < len(code); i++ {
		if code[i] != '0' && code[i] != '1' {
			panic(fmt.Sprintf("code for byte %v has invalid bit '%v'", out, code[i]))
		}

		bit := code[i] - '0'

		// Middle of the code: create a branch node if needed.
		if i < len(code)-1 {
			next := curr.child[bit]
			if next == nil {
				next = &Node{t: BranchNode}
				curr.child[bit] = next
			} else if next.t != BranchNode {
				panic(fmt.Sprintf("code for byte %v found non-branch node at bit number %v", out, i))
			}
			curr = next
			continue
		}

		// End of the code: create a leaf node.
		// Each code should lead to a unique leaf node.
		if curr.child[bit] != nil {
			panic(fmt.Sprintf("code for byte %v reached a duplicate leaf node", out))
		}

		switch {
		case out == EOS:
			curr.child[bit] = &Node{t: EOSNode}
		case out < 32 || 127 < out:
			// Unprintable characters are allowed by the Huffman code but not in gRPC.
			// See: https://github.com/grpc/grpc/blob/v1.60.x/doc/PROTOCOL-HTTP2.md#requests.
			curr.child[bit] = &Node{t: UnprintableNode}
		default:
			curr.child[bit] = &Node{t: OutputNode, output: out}
		}
	}
}

func validate(node *Node) {
	// The binary tree should be complete: there should not be missing leaf nodes.
	if node == nil {
		panic("unexpected nil in binary tree")
	}
	if node.t != BranchNode {
		if node.child[0] != nil || node.child[1] != nil {
			panic(fmt.Sprintf("unexpected child under node %+v", node))
		}
		return
	}
	validate(node.child[0])
	validate(node.child[1])
}

func optimize(node *Node) {
	if node.t != BranchNode {
		return
	}
	optimize(node.child[0])
	optimize(node.child[1])
	// Collapse each unprintable subtree into a single unprintable node.
	if node.child[0].t == UnprintableNode && node.child[1].t == UnprintableNode {
		node.t = UnprintableNode
		node.child[0] = nil
		node.child[1] = nil
	}
}

func computeStats(stats *Stats, node *Node) {
	if node == nil {
		stats.numInvalidNodes++
		return
	}
	switch node.t {
	case BranchNode:
		stats.numBranchNodes++
		computeStats(stats, node.child[0])
		computeStats(stats, node.child[1])
	case OutputNode:
		stats.numOutputNodes++
	case UnprintableNode:
		stats.numUnprintableNodes++
	case EOSNode:
		stats.numInvalidNodes++
	}
}

func assignTableIndices(node *Node, idx int) int {
	if node.t != BranchNode {
		return idx
	}
	node.tableIndex = idx
	idx++
	// We use a preorder traversal to ensure the root node has index 0.
	idx = assignTableIndices(node.child[0], idx)
	idx = assignTableIndices(node.child[1], idx)
	return idx
}

const decoderTablePrefix = `
// Huffman decoder table. Decoding starts at index=0. For each bit, we inspect
// kHuffmanDecoderTable[index][bit] and take an action based on that value:
//
//   * If the value matches 0b0..._...., set index=value
//   * If the value matches 0b1xxx_xxxx, output byte 32 + xx_xxxx, set index=0
//   * If the value matches 0b1111_1110, fail: unprintable character
//   * If the value matches 0b1111_1111, fail: decoder entered an invalid state
//
static constexpr std::array<uint8_t[2], %d> kHuffmanDecoderTable = {{
`
const decoderTableSuffix = `
}};
`

func printDecoderTable(stats *Stats) {
	fmt.Printf(decoderTablePrefix, stats.numBranchNodes)
	printDecoderTableEntries(&rootNode)
	fmt.Print(decoderTableSuffix)
}

func printDecoderTableEntries(node *Node) {
	if node.t != BranchNode {
		return
	}

	// Print nodes in preorder, which is the same order the indices were created.
	fmt.Printf("  /*%v=*/ {%v, %v},\n",
		node.tableIndex,
		toDecoderTableEntry(node.child[0]),
		toDecoderTableEntry(node.child[1]))

	printDecoderTableEntries(node.child[0])
	printDecoderTableEntries(node.child[1])
}

func toDecoderTableEntry(node *Node) uint8 {
	switch node.t {
	case BranchNode:
		if node.tableIndex > 127 {
			panic(fmt.Sprintf("BranchNode index %d > 127", node.tableIndex))
		}
		return uint8(node.tableIndex)
	case OutputNode:
		return uint8(node.output-32) | 0b1000_0000
	case UnprintableNode:
		return 0b1111_1110
	case EOSNode:
		// RFC 7541 §5.2: "A Huffman-encoded string literal containing the EOS
		// symbol MUST be treated as a decoding error."
		return 0b1111_1111
	}
	panic(fmt.Sprintf("unexpected node type %v", node.t))
}

const responseHeaderFieldsPrefix = `
// HPACK-encoded header fields which form grpc Response-Headers.
// These are the same for every response.
static constexpr std::array<uint8_t, %d> kResponseHeaderFields = {
`

const responseHeaderFieldsSuffix = `
};
`

const maxStatusCode = 16
const responseTrailerFieldsPrefix = `
// HPACK-encoded header fields which form grpc Trailers. All response Trailers
// are identical except for the status code.
//
// This is indexed by pw::Status::Code, which happens to be identical to grpc's
// status code.
struct ResponseTrailerPayload {
  uint32_t size;
  std::array<uint8_t, %d> bytes;
};
static constexpr std::array<ResponseTrailerPayload, %d> kResponseTrailerFields = {{
`

const responseTrailerFieldsSuffix = `
}};
`

func printStaticResponses() {
	respHeaderFields := buildResponseHeaderFields()
	fmt.Printf(responseHeaderFieldsPrefix, len(respHeaderFields))
	fmt.Printf("  %s", fmtBytes(respHeaderFields))
	fmt.Printf("%s", responseHeaderFieldsSuffix)

	maxLen := len(buildResponseTrailerFields(maxStatusCode))
	fmt.Printf(responseTrailerFieldsPrefix, maxLen, maxStatusCode+1)
	for code := 0; code <= maxStatusCode; code++ {
		payload := buildResponseTrailerFields(code)
		fmt.Printf("  {.size=%d, .bytes={%s}},\n", len(payload), fmtBytes(payload))
	}
	fmt.Printf("%s", responseTrailerFieldsSuffix)
}

func fmtBytes(bytes []byte) string {
	var out string
	for i, b := range bytes {
		if i < len(bytes)-1 {
			out += fmt.Sprintf("0x%x, ", b)
		} else {
			out += fmt.Sprintf("0x%x", b)
		}
	}
	return out
}

// Response-Headers → ":status 200" "content-type application/grpc"
func buildResponseHeaderFields() []byte {
	var out []byte
	out = append(out, 0b1000_1000) // RFC 7541 §6.1: index 8 is ":status" "200"
	out = append(out, 0b0101_1111) // RFC 7541 §6.2.1: index 31 is "content-type"
	out = append(out, literalEncodeWithLength("application/grpc")...)
	return out
}

// Trailers → "grpc-status <DIGITS>"
func buildResponseTrailerFields(statusCode int) []byte {
	var out []byte
	out = append(out, 0b0100_0000) // RFC 7541 §6.2.1: literal name and value
	out = append(out, literalEncodeWithLength("grpc-status")...)
	if statusCode >= 10 {
		out = append(out, 0b0000_0010)             // RFC 7541 §6.2.1: length of value w/out huffman
		out = append(out, byte(statusCode/10)+'0') // RFC 7541 §6.2.1: value
		out = append(out, byte(statusCode%10)+'0')
	} else {
		out = append(out, 0b0000_0001)          // RFC 7541 §6.2.1: length of value w/out huffman
		out = append(out, byte(statusCode)+'0') // RFC 7541 §6.2.1: value
	}
	return out
}

func literalEncodeWithLength(str string) []byte {
	// RFC 7541 §6.2.1: we are generating this structure with H=0. Since the
	// strings are short, there is minimal benefit to using a Huffman encoding,
	// at most 2-3 bytes per string.
	// +---+---+-----------------------+
	// | H |     Value Length (7+)     |
	// +---+---------------------------+
	// | Value String (Length octets)  |
	// +-------------------------------+
	//
	// The length must be encoded across multiple bytes if 128 or larger. See RFC
	// 7541 §5.2. We don't write any strings that large.
	if len(str) >= 128 {
		panic(fmt.Sprintf("cannot encode string of length %v: %#v", len(str), str))
	}
	return append([]byte{byte(len(str))}, str...)
}

// Special symbol for Huffman EOS.
// See RFC 7541 Appendix B.
const EOS = 256

// Mapping from symbol to Huffman code.
// Taken verbatim from RFC 7541 Appendix B.
var huffmanTable = map[int]string{
	0:   "1111111111000",
	1:   "11111111111111111011000",
	2:   "1111111111111111111111100010",
	3:   "1111111111111111111111100011",
	4:   "1111111111111111111111100100",
	5:   "1111111111111111111111100101",
	6:   "1111111111111111111111100110",
	7:   "1111111111111111111111100111",
	8:   "1111111111111111111111101000",
	9:   "111111111111111111101010",
	10:  "111111111111111111111111111100",
	11:  "1111111111111111111111101001",
	12:  "1111111111111111111111101010",
	13:  "111111111111111111111111111101",
	14:  "1111111111111111111111101011",
	15:  "1111111111111111111111101100",
	16:  "1111111111111111111111101101",
	17:  "1111111111111111111111101110",
	18:  "1111111111111111111111101111",
	19:  "1111111111111111111111110000",
	20:  "1111111111111111111111110001",
	21:  "1111111111111111111111110010",
	22:  "111111111111111111111111111110",
	23:  "1111111111111111111111110011",
	24:  "1111111111111111111111110100",
	25:  "1111111111111111111111110101",
	26:  "1111111111111111111111110110",
	27:  "1111111111111111111111110111",
	28:  "1111111111111111111111111000",
	29:  "1111111111111111111111111001",
	30:  "1111111111111111111111111010",
	31:  "1111111111111111111111111011",
	32:  "010100",
	33:  "1111111000",
	34:  "1111111001",
	35:  "111111111010",
	36:  "1111111111001",
	37:  "010101",
	38:  "11111000",
	39:  "11111111010",
	40:  "1111111010",
	41:  "1111111011",
	42:  "11111001",
	43:  "11111111011",
	44:  "11111010",
	45:  "010110",
	46:  "010111",
	47:  "011000",
	48:  "00000",
	49:  "00001",
	50:  "00010",
	51:  "011001",
	52:  "011010",
	53:  "011011",
	54:  "011100",
	55:  "011101",
	56:  "011110",
	57:  "011111",
	58:  "1011100",
	59:  "11111011",
	60:  "111111111111100",
	61:  "100000",
	62:  "111111111011",
	63:  "1111111100",
	64:  "1111111111010",
	65:  "100001",
	66:  "1011101",
	67:  "1011110",
	68:  "1011111",
	69:  "1100000",
	70:  "1100001",
	71:  "1100010",
	72:  "1100011",
	73:  "1100100",
	74:  "1100101",
	75:  "1100110",
	76:  "1100111",
	77:  "1101000",
	78:  "1101001",
	79:  "1101010",
	80:  "1101011",
	81:  "1101100",
	82:  "1101101",
	83:  "1101110",
	84:  "1101111",
	85:  "1110000",
	86:  "1110001",
	87:  "1110010",
	88:  "11111100",
	89:  "1110011",
	90:  "11111101",
	91:  "1111111111011",
	92:  "1111111111111110000",
	93:  "1111111111100",
	94:  "11111111111100",
	95:  "100010",
	96:  "111111111111101",
	97:  "00011",
	98:  "100011",
	99:  "00100",
	100: "100100",
	101: "00101",
	102: "100101",
	103: "100110",
	104: "100111",
	105: "00110",
	106: "1110100",
	107: "1110101",
	108: "101000",
	109: "101001",
	110: "101010",
	111: "00111",
	112: "101011",
	113: "1110110",
	114: "101100",
	115: "01000",
	116: "01001",
	117: "101101",
	118: "1110111",
	119: "1111000",
	120: "1111001",
	121: "1111010",
	122: "1111011",
	123: "111111111111110",
	124: "11111111100",
	125: "11111111111101",
	126: "1111111111101",
	127: "1111111111111111111111111100",
	128: "11111111111111100110",
	129: "1111111111111111010010",
	130: "11111111111111100111",
	131: "11111111111111101000",
	132: "1111111111111111010011",
	133: "1111111111111111010100",
	134: "1111111111111111010101",
	135: "11111111111111111011001",
	136: "1111111111111111010110",
	137: "11111111111111111011010",
	138: "11111111111111111011011",
	139: "11111111111111111011100",
	140: "11111111111111111011101",
	141: "11111111111111111011110",
	142: "111111111111111111101011",
	143: "11111111111111111011111",
	144: "111111111111111111101100",
	145: "111111111111111111101101",
	146: "1111111111111111010111",
	147: "11111111111111111100000",
	148: "111111111111111111101110",
	149: "11111111111111111100001",
	150: "11111111111111111100010",
	151: "11111111111111111100011",
	152: "11111111111111111100100",
	153: "111111111111111011100",
	154: "1111111111111111011000",
	155: "11111111111111111100101",
	156: "1111111111111111011001",
	157: "11111111111111111100110",
	158: "11111111111111111100111",
	159: "111111111111111111101111",
	160: "1111111111111111011010",
	161: "111111111111111011101",
	162: "11111111111111101001",
	163: "1111111111111111011011",
	164: "1111111111111111011100",
	165: "11111111111111111101000",
	166: "11111111111111111101001",
	167: "111111111111111011110",
	168: "11111111111111111101010",
	169: "1111111111111111011101",
	170: "1111111111111111011110",
	171: "111111111111111111110000",
	172: "111111111111111011111",
	173: "1111111111111111011111",
	174: "11111111111111111101011",
	175: "11111111111111111101100",
	176: "111111111111111100000",
	177: "111111111111111100001",
	178: "1111111111111111100000",
	179: "111111111111111100010",
	180: "11111111111111111101101",
	181: "1111111111111111100001",
	182: "11111111111111111101110",
	183: "11111111111111111101111",
	184: "11111111111111101010",
	185: "1111111111111111100010",
	186: "1111111111111111100011",
	187: "1111111111111111100100",
	188: "11111111111111111110000",
	189: "1111111111111111100101",
	190: "1111111111111111100110",
	191: "11111111111111111110001",
	192: "11111111111111111111100000",
	193: "11111111111111111111100001",
	194: "11111111111111101011",
	195: "1111111111111110001",
	196: "1111111111111111100111",
	197: "11111111111111111110010",
	198: "1111111111111111101000",
	199: "1111111111111111111101100",
	200: "11111111111111111111100010",
	201: "11111111111111111111100011",
	202: "11111111111111111111100100",
	203: "111111111111111111111011110",
	204: "111111111111111111111011111",
	205: "11111111111111111111100101",
	206: "111111111111111111110001",
	207: "1111111111111111111101101",
	208: "1111111111111110010",
	209: "111111111111111100011",
	210: "11111111111111111111100110",
	211: "111111111111111111111100000",
	212: "111111111111111111111100001",
	213: "11111111111111111111100111",
	214: "111111111111111111111100010",
	215: "111111111111111111110010",
	216: "111111111111111100100",
	217: "111111111111111100101",
	218: "11111111111111111111101000",
	219: "11111111111111111111101001",
	220: "1111111111111111111111111101",
	221: "111111111111111111111100011",
	222: "111111111111111111111100100",
	223: "111111111111111111111100101",
	224: "11111111111111101100",
	225: "111111111111111111110011",
	226: "11111111111111101101",
	227: "111111111111111100110",
	228: "1111111111111111101001",
	229: "111111111111111100111",
	230: "111111111111111101000",
	231: "11111111111111111110011",
	232: "1111111111111111101010",
	233: "1111111111111111101011",
	234: "1111111111111111111101110",
	235: "1111111111111111111101111",
	236: "111111111111111111110100",
	237: "111111111111111111110101",
	238: "11111111111111111111101010",
	239: "11111111111111111110100",
	240: "11111111111111111111101011",
	241: "111111111111111111111100110",
	242: "11111111111111111111101100",
	243: "11111111111111111111101101",
	244: "111111111111111111111100111",
	245: "111111111111111111111101000",
	246: "111111111111111111111101001",
	247: "111111111111111111111101010",
	248: "111111111111111111111101011",
	249: "1111111111111111111111111110",
	250: "111111111111111111111101100",
	251: "111111111111111111111101101",
	252: "111111111111111111111101110",
	253: "111111111111111111111101111",
	254: "111111111111111111111110000",
	255: "11111111111111111111101110",
	256: "111111111111111111111111111111",
}
